#---------------------------------------------------------------
# City-Defined Neighborhoods Database
#
# 01_descriptives.R
# Creates main figures for paper (Figs. 2-6).
#---------------------------------------------------------------
# NOTE: If you have not run 00_unzip_files.R, you must do so for shapefiles to load properly


rm(list=ls())
gc()
library(tidyverse)
library(sf)
library(patchwork)
library(segregation)
library(units)
library(tinytiger)
library(zctaCrosswalk)
library(glue)
library(gt)
library(stringr)

# set working directory to data_release repository top directory

theme_neigh <- theme_bw(base_size = 18) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"),
        strip.background = element_rect(fill = "#F5F1E3"),
        strip.text = element_text(size = 18),
        axis.title = element_text(face = "bold"))

DATA_PATH <- "cdnd_data/"
#---------------------------------------------------------------

# Load in list of all cities in data with information on what kind of neighborhood was identified for each city
city_list = read_csv('other_data/city_list.csv')

sum(city_list$pop_2022_acs[city_list$Neighborhood == TRUE | city_list$NHA == TRUE])

###. Table 1: Counts of neighborhood types across cities in the CDND for each of the 336 cities in our target sample.

city_list |>
  group_by(Neighborhood,NHA, HOA)|>
  summarise(n = n(),
             n_neighborhoods = sum(n_Neighborhood,na.rm=T),
            n_nha = sum(n_NHA,na.rm=T),
            pop = sum(pop_2022_acs,na.rm=T)
            ) |>
  mutate(type = case_when(
    Neighborhood | NHA ~ 'Neighborhood sample',
    !(Neighborhood|NHA) & HOA ~ 'HOA',
    !Neighborhood & !NHA & !HOA ~ 'No data'
  ),

  n_Neighborhood = case_when(
    Neighborhood ~ n_neighborhoods,
    NHA ~ n_nha,
    T ~ NA
  )
  ) |>
  mutate(type2 = case_when(

      Neighborhood  ~ 'Official neighborhoods',
      NHA ~ 'Neighborhood associations',
      HOA ~ 'Housing associations',
      T ~ 'No data'

  ))|>
  group_by(type2)|>
  summarise(Cities = sum(n),
            Neighborhoods = sum(n_Neighborhood),
            `Pop.` = sum(pop))|>
  slice(4,2,1,3)|>
  gt()|>
  cols_label(type2='')|>
  fmt_missing(missing_text = '-')|>
  as_latex()|>
  as.character()|>
  str_replace_all('longtable', 'tabular')|>
  write_file('output/city_list.tex')



## Tables A1-A3 in the appendix: List of all cities in data with information on what kind of neighborhood was found for each

reformat_city_state <- function(text) {
  # Step 1: Insert a space between any lowercase letter followed by an uppercase letter
  formatted <- gsub("([a-z])([A-Z])", "\\1 \\2", text, perl = TRUE)
  # Step 2: Insert a comma before the state abbreviation (the last two uppercase letters)
  formatted <- gsub(" ([A-Z]{2})$", ", \\1", formatted, perl = TRUE)
  return(formatted)
}

n= nrow(city_list)

# split cities into double column tables of 60 cities per column
iter = tibble(
  iter1 = c(1, 61, 121, 181, 241, 301),
  iter2 = c(60, 120, 180, 240, 300, n)
)

for(i in 1:nrow(iter)){
  city_list |>
    mutate(City = reformat_city_state(city),
           Pop = scales::comma(pop_2022_acs),
           NBHD = if_else(Neighborhood == T, 'Yes', 'No'),
           NHA = if_else(NHA == T, 'Yes','No'),
           HOA = if_else(HOA == T, 'Yes', 'No'),
           Rank = 1:n())|>
    select(Rank, City, Pop, NBHD, NHA, HOA)|>
    slice(iter$iter1[i]:iter$iter2[i])|>
    gt()|>
    cols_label('Rank'='')|>
    as_latex()|>
    as.character()|>
    str_replace_all('longtable', 'tabular')|>
    write_file(glue('output/list_of_all_cities-{i}.tex'))

}


# Filter city list to cities where we found Neighborhoods or NHAs for analysis
city_list = city_list |>
  filter(Neighborhood|NHA)

# Load in cleaned neighborhood shapefiles.
onm_files <- city_list |>
  mutate(file = if_else(Neighborhood == T,
                        paste0(DATA_PATH,city,'/',city,'_onm_cleaned.shp'),
                        paste0(DATA_PATH,city,'/',city,'_nha_cleaned.shp')

                        ))|>
  select(file)|>
  as.vector()|>
  unlist()



onm <- lapply(onm_files, function(tfile) {
  d <- st_read(tfile)
  d$filename <- tfile
  d
})

length(onm)
# Start building components of Figures 2 and 3
# Calculate area for each neighborhood
areas <- lapply(onm, function(ll) {

  # either nha or onm
  type <- colnames(ll)[1]

  z <- st_make_valid(ll)
  tibble(type = type,
         res = units::set_units(st_area(z), miles^2),
         filename = unique(ll$filename))

})


# Quantiles of neighborhood land area
bind_rows(areas) %>%
  mutate(res = drop_units(res)) %>% pull(res) %>% quantile()

# Quantile of neighborhood land area in Philadelphia
bind_rows(areas) %>%
  filter(filename == "cdnd_data/PhiladelphiaPA/PhiladelphiaPA_onm_cleaned.shp") %>%
  mutate(res = drop_units(res)) %>% pull(res) %>% quantile()


#------------------------------------------------------------------
# Area plot function

plot_area <- function(type = "Full") {

  if (type == "Full") {
    to_plot <- bind_rows(areas) %>%
      filter(!is.na(id))
  }

  if (type == "Philly") {

    to_plot <- bind_rows(areas) %>%
      filter(!is.na(id)) %>%
      mutate(is_philly = str_detect(filename, "PhiladelphiaPA"),
             is_philly = ifelse(is_philly, 1, 0)) %>%
      filter(is_philly == 1)
  }

  to_plot %>%
    mutate(res = drop_units(res)) %>%
    filter(res < 13.5) %>%
    filter(type %in% c("nbhd", "nha")) %>%
    mutate(logunits = log(res),
           label = ifelse(type == "nha", "NHA",
                          ifelse(type == "nbhd", "Neighborhood", NA))) %>%
    ggplot(aes(x = res)) +
    geom_histogram(fill = "#FFCB05", color = "white") +
    labs(y = "Count", x = "Neighborhood Area\n(Sq. Miles)") +
    scale_y_continuous(labels = scales::comma) +
    theme_neigh +
    guides(fill = "none")

}


#------------------------------------------------------------------
#--------------------------------------------------------------------
# Read in block-level city neighborhood data for demographic analysis

analysis_files <- city_list |>
  mutate(file =  paste0(DATA_PATH,city,'/',city,'.rds'))|>
  select(file)|>
  as.vector()|>
  unlist()

# Read ZCTA / GEOID crosswalk to link to block data
z_cw = read_delim('https://www2.census.gov/geo/docs/maps-data/data/rel2020/zcta520/tab20_zcta520_tabblock20_natl.txt', delim = '|')|>
  select(ZCTA=GEOID_ZCTA5_20, GEOID=GEOID_TABBLOCK_20)|>
  drop_na()

cities <- lapply(analysis_files, function(tfile) {
  d <- read_rds(tfile)
  d$filename <- tfile
  d |>
    left_join(z_cw) |>
    mutate(
      tractfips = paste0(state,county,tract),
      bgfips =  paste0(state,county,tract, substr(block,1,1)))
})


# get list of unique zctas, tracts, and block group across cities,
# calculate pop and pop_black in each geography

zctas = lapply(cities, function(city_df){

city_df |>
    st_drop_geometry() |>
    group_by(filename,ZCTA)|>
    summarise(pop = sum(pop, na.rm = T),
              pop_black = sum(pop_black, na.rm=T))



}) |>
  do.call(what='bind_rows')

tracts = lapply(cities, function(city_df){

  city_df |>
    st_drop_geometry() |>
    group_by(filename,tractfips)|>
    summarise(pop = sum(pop, na.rm = T),
              pop_black = sum(pop_black, na.rm=T))



}) |>
  do.call(what='bind_rows')

block_groups = lapply(cities, function(city_df){

  city_df |>
    st_drop_geometry() |>
    group_by(filename,bgfips)|>
    summarise(pop = sum(pop, na.rm = T),
              pop_black = sum(pop_black, na.rm=T))



}) |>
  do.call(what='bind_rows')

######
gc()

# calculate area for zctas, tracts, and block groups and link to above created lists of each
zcta_tt = tt_zcta(year = 2020) |>
  filter(ZCTA5CE20 %in% zctas$ZCTA) |>
  mutate(res =  units::set_units(st_area(geometry), miles^2))|>
  st_drop_geometry() |>
  select(ZCTA = ZCTA5CE20, res)


tracts_tt = lapply(c(state.abb, 'DC'), FUN = function(st){tt_tracts(state=st, year=2020)})|>
  do.call(what='bind_rows')|>
  mutate(res =  units::set_units(st_area(geometry), miles^2) )|>
  st_drop_geometry() |>
  select(tractfips = GEOID, res)

block_groups_tt = lapply(c(state.abb, 'DC'), FUN = function(st){tt_block_groups(state=st, year=2020)})|>
  do.call(what='bind_rows')|>
  mutate(res =  units::set_units(st_area(geometry), miles^2) )|>
  st_drop_geometry() |>
  select(bgfips = GEOID, res)

gc()

zctas <- zctas |>
  left_join(zcta_tt)

tracts <- tracts |>
  left_join(tracts_tt)

block_groups <- block_groups |>
  left_join(block_groups_tt)

rm(zcta_tt,tracts_tt,block_groups_tt)
gc()

####


# Define population size function to calculate total population by neighborhood
pop_size <- lapply(cities, function(city_df) {

  ret_df <- NULL

  if (!sum(is.na(city_df$nbhd_id)) == nrow(city_df)) {
    x1 <- city_df %>%
      st_drop_geometry() %>%
      group_by(nbhd_id) %>%
      summarize(total_pop = sum(pop,na.rm=T),
                filename = unique(filename))

    x1$type <- "NBHD"
    x1 <- x1 %>% rename(id = nbhd_id)

    ret_df <- bind_rows(ret_df, x1)
  }

  if (!sum(is.na(city_df$nha_id)) == nrow(city_df)) {
    x2 <- city_df %>%
      st_drop_geometry() %>%
      group_by(nha_id) %>%
      summarize(total_pop = sum(pop,na.rm=T),
                filename = unique(filename))

    x2$type <- "NHA"
    x2 <- x2 %>% rename(id = nha_id)

    ret_df <- bind_rows(ret_df, x2)
  }

  ret_df

})

#------------------------------------------------------------------
# Population Plot function

plot_pop <- function(type = "Full") {

  if (type == "Full") {
    to_plot <- bind_rows(pop_size) %>%
      filter(!is.na(id))
  }

  if (type == "Philly") {

    to_plot <- bind_rows(pop_size) %>%
      filter(!is.na(id)) %>%
      mutate(is_philly = str_detect(filename, "PhiladelphiaPA"),
             is_philly = ifelse(is_philly, 1, 0)) %>%
      filter(is_philly == 1)

  }

  to_plot %>%
      ggplot(aes(x = total_pop)) +
      geom_histogram(fill = "#00274C", color = "white") +
      scale_x_log10(breaks = c(10, 1000, 100000),
                    labels = c("10", "1k", "100k")) +
      scale_y_continuous(labels = scales::comma) +
      labs(y = "Count", x = "Neighborhood Population") +
      theme_neigh
}


#------------------------------------------------------------------

# Calculate diversity across neighborhoods

seg_index <- lapply(cities, function(city_df) {

  ret_df <- NULL

  if (!sum(is.na(city_df$nbhd_id)) == nrow(city_df)) {
    seg <- city_df %>% st_drop_geometry() %>%
      select(nbhd_id, filename, pop, starts_with("pop_")) %>%
      pivot_longer(cols = starts_with("pop_")) %>%
      filter(pop >500) %>%
      mutate(nbhd_prop = value / pop) %>%
      group_by(nbhd_id) %>%
      summarize(max_prop = max(nbhd_prop),
                filename = unique(filename))

    seg$type <- "NBHD"
    # x1 <- x1 %>% rename(id = nbhd_id)

    ret_df <- bind_rows(ret_df, seg)
  }

  if (!sum(is.na(city_df$nha_id)) == nrow(city_df)) {
    seg <- city_df %>% st_drop_geometry() %>%
      select(nha_id, filename, pop, starts_with("pop_")) %>%
      pivot_longer(cols = starts_with("pop_")) %>%
      filter(pop > 500) %>%
      mutate(nbhd_prop = value / pop) %>%
      group_by(nha_id) %>%
      summarize(max_prop = max(nbhd_prop),
                filename = unique(filename))

    seg$type <- "NHA"
    # x2 <- x2 %>% rename(id = nha_id)

    ret_df <- bind_rows(ret_df, seg)
  }

  ret_df

})



#------------------------------------------------------------------
# Segregation Plot

plot_segregation <- function(type = "Full") {

  if (type == "Full") {
    to_plot <- bind_rows(seg_index)
  }

  if (type == "Philly") {

    to_plot <- bind_rows(seg_index) %>%
      mutate(is_philly = str_detect(filename, "PhiladelphiaPA"),
             is_philly = ifelse(is_philly, 1, 0)) %>%
      filter(is_philly == 1)

  }

  to_plot %>%
    ggplot(aes(x = max_prop)) +
    geom_histogram(fill = "#9A3324",
                   color = "white") +
    scale_y_continuous(labels = scales::comma) +
    scale_x_continuous(breaks = c(0.3, 0.6, 0.9),
                       labels = c("30%", "60%", "90%")) +
    labs(y = "Count", x = "Prop. Largest\nRacial/Ethnic Group") +
    theme_neigh

}


#---------------------------------------------------


# Create Philadelphia maps and figures (Figures 3 and 4 in manuscript)
philly_idx <- which(grepl('PhiladelphiaPA', analysis_files))


dd <- st_read("other_data/Philadelphia_onm.shp")


philly <- cities[[philly_idx]] %>%
  summarize(geometry = st_union(geometry))


(pmap <- dd %>%
    mutate(nid = 1:nrow(dd)) %>%
    ggplot() +
    geom_sf(data = philly, fill = NA, color = "black") +
    geom_sf(aes(fill = factor(nid)),
            color = "black", lwd = 0.05) +
    geom_sf_label(data = dd %>% filter(NAME %in% c("FISHTOWN", "UNIVERSITY CITY",
                                                   "GERMANTOWN")),
                  aes(label = tools::toTitleCase(tolower(NAME)), fontface = "bold"),
                  size = 5) +
    ggthemes::theme_map() +
    ggredist::scale_fill_randmcnally() +
    guides(fill = "none") +
    theme(plot.margin = unit(c(0,0,0,0), "cm"))
)


# Figure A1 across cities
raw <- read_csv('other_data/city_list.csv') |>
  mutate(City = reformat_city_state(city))

samp <- raw %>% filter(pop_2022_acs > 100000)

(
  p_samp <- samp %>%
    mutate(collected = ifelse(Neighborhood|NHA, "Yes", "No")) %>%
    ggplot(aes(x = reorder(City, desc(pop_2022_acs)),
               y = pop_2022_acs,
               fill = collected)) +
    geom_col(color = "black", lwd = 0.001) +
    scale_y_log10(labels = scales::comma, expand = c(0,0)) +
    scale_fill_manual(values = c("#FFCB05", "#00274C"),
                      name = "Neighborhoods Reported by City") +
    labs(x = "Sample City",
         y = "2022 (Logged) Population") +
    theme_neigh +
    theme(legend.position = "bottom",
          axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          legend.title = element_text(face = "bold", size = 20),
          panel.grid = element_blank())
)
ggsave(plot = p_samp, filename = "output/samp_size.png", width = 11, height = 6)




#------------------------------------------------------------

pf1 <- plot_pop("Full")
pf2 <- plot_area("Full")
pf3 <- plot_segregation("Full")

pf <- pf1 + pf2 + pf3
ggsave("output/sample.png", pf, width = 12, height = 3)

pp1 <- plot_pop("Philly")
pp2 <- plot_area("Philly")
pp3 <- plot_segregation("Philly")

design <- "
AAAAAA
AAAAAA
AAAAAA
AAAAAA
AAAAAA
BBCCDD
BBCCDD"





pphilly <- pmap + pp1 + pp2 + pp3 + plot_layout(nrow = 2, design = design)
ggsave("output/sample_philly.pdf", pphilly, width = 12, height = 8)

#------------------------------------------------------------

# Report median population size
 cities[[philly_idx]] %>%
   st_drop_geometry() %>%
   group_by(nbhd_id) %>%
   summarize(pop = sum(pop)) %>%
   pull(pop) %>% quantile()

#----------------------------------------------------------------------------

# Figure 4

state_places <- read_rds("other_data/PA_places.rds")

philly_geo <- state_places %>%
  filter(place == "60000")

philly_geo <- philly_geo %>%
  censable::breakdown_geoid(GEOID)

zcta <- tigris::zctas( year = 2020)|>
  filter(ZCTA5CE20 %in% get_zctas_by_state(states='PA'))




zcta_matches <- geomander::geo_match(philly_geo, zcta)

philly_geo$zcta <- zcta$ZCTA5CE20[zcta_matches]

#--------------------------------------------------------------------
pzcta <- philly_geo %>%
  group_by(zcta) %>%
  summarize(geometry = st_union(geometry))

ptract <- philly_geo %>%
  group_by(tract) %>%
  summarize(geometry = st_union(geometry))

pbg <- philly_geo %>%
  mutate(gp = paste0(tract, block_group)) %>%
  group_by(gp) %>%
  summarize(geometry = st_union(geometry))

(pgeo1 <- ptract %>%
    mutate(nid = 1:nrow(ptract)) %>%
    ggplot() +
    geom_sf(data = philly, fill = NA, color = "black") +
    geom_sf(aes(fill = factor(nid)),
            color = "black", lwd = 0.1) +
    ggthemes::theme_map() +
    ggredist::scale_fill_randmcnally() +
    guides(fill = "none") +
    labs(title = "Tracts") +
    theme(plot.margin = unit(c(0.25,0,0,0), "cm"),
          plot.title = element_text(hjust = 0.5, face = "bold",
                                    size = 15))
)

(pgeo2 <- pzcta %>%
    mutate(nid = 1:nrow(pzcta)) %>%
    ggplot() +
    geom_sf(data = philly, fill = NA, color = "black") +
    geom_sf(aes(fill = factor(nid)),
            color = "black", lwd = 0.1) +
    ggthemes::theme_map() +
    ggredist::scale_fill_randmcnally() +
    guides(fill = "none") +
    labs(title = "ZCTAs") +
    theme(plot.margin = unit(c(0.25,0,0,0), "cm"),
          plot.title = element_text(hjust = 0.5, face = "bold",
                                    size = 15))
)

(pgeo3 <- pbg %>%
    mutate(nid = 1:nrow(pbg)) %>%
    ggplot() +
    geom_sf(data = philly, fill = NA, color = "black") +
    geom_sf(aes(fill = factor(nid)),
            color = "black", lwd = 0.1) +
    ggthemes::theme_map() +
    ggredist::scale_fill_randmcnally() +
    guides(fill = "none") +
    labs(title = "Block Groups") +
    theme(plot.margin = unit(c(0.25,0,0,0), "cm"),
          plot.title = element_text(hjust = 0.5, face = "bold",
                                    size = 15))
)





(pmap <- dd %>%
    mutate(nid = 1:nrow(dd)) %>%
    ggplot() +
    geom_sf(data = philly, fill = NA, color = "black") +
    geom_sf(aes(fill = factor(nid)),
            color = "black", lwd = 0.1) +
    ggthemes::theme_map() +
    ggredist::scale_fill_randmcnally() +
    guides(fill = "none") +
    labs(title = "Planning Neighborhoods") +
    theme(plot.margin = unit(c(0.25,0,0,0), "cm"),
          plot.title = element_text(hjust = 0.5, face = "bold",
                                    size = 15))
)

design <- "
AABB
AABB
CCDD
CCDD"

p <- pmap + pgeo1 + pgeo3 + pgeo2 + plot_layout(nrow = 1, design = design)
ggsave("output/philly_versions.png", width = 6, height = 6)



#### Figure 5: Scatter plots of neighborhood demographics against zcta, tract, and block group demographics

l = zctas |>
  mutate(p_black = pop_black/pop)|>
  group_by(filename)|>
  summarise(avg_pop = mean(pop, na.rm=T),
            p_black = mean(p_black[!is.infinite(p_black)], na.rm=T),
            res = mean(res,na.rm=T)) |>
  mutate(geo = 'ZCTA')|>
  bind_rows(
    tracts |>
      mutate(p_black = pop_black/pop)|>
      group_by(filename)|>
      summarise(avg_pop = mean(pop, na.rm=T),
                p_black = mean(p_black[!is.infinite(p_black)], na.rm=T),
                res = mean(res,na.rm=T)) |>
      mutate(geo = 'Tract')
  )|>
  bind_rows(
    block_groups |>
      mutate(p_black = pop_black/pop)|>
      group_by(filename)|>
      summarise(avg_pop = mean(pop, na.rm=T),
                p_black = mean(p_black[!is.infinite(p_black)], na.rm=T),
                res = mean(res,na.rm=T)) |>
      mutate(geo = 'Block Group')
  )


# merge this to neighborhood summaries

p_black <- lapply(cities, function(city_df) {

  ret_df <- NULL

  if (!sum(is.na(city_df$nbhd_id)) == nrow(city_df)) {
    x1 <- city_df %>%
      st_drop_geometry() %>%
      group_by(nbhd_id) %>%
      summarize(total_pop = sum(pop,na.rm=T),
                total_pop_black = sum(pop_black,na.rm=T),
                filename = unique(filename))

    x1$type <- "NBHD"
    x1 <- x1 %>% rename(id = nbhd_id)

    ret_df <- bind_rows(ret_df, x1)
  }

  if (!sum(is.na(city_df$nha_id)) == nrow(city_df)) {
    x2 <- city_df %>%
      st_drop_geometry() %>%
      group_by(nha_id) %>%
      summarize(total_pop = sum(pop,na.rm=T),
                total_pop_black = sum(pop_black,na.rm=T),
                filename = unique(filename))

    x2$type <- "NHA"
    x2 <- x2 %>% rename(id = nha_id)

    ret_df <- bind_rows(ret_df, x2)
  }

  ret_df

})


neighs <- pop_size |>
  do.call(what='bind_rows') |>
  mutate(city=str_extract(filename, "(?<=cdnd_data/)[^/]+"))|>
  group_by(city)|>
  summarise(pop_neigh = mean(total_pop, na.rm=T)) |>
  left_join( p_black |>
               do.call(what='bind_rows') |>
               mutate(p_black = total_pop_black/total_pop,
                      city=str_extract(filename, "(?<=cdnd_data/)[^/]+"))|>
               group_by(city)|>
               summarise(p_black_neigh = mean(p_black[!is.infinite(p_black)], na.rm=T)) )|>
  left_join( areas |>
               do.call(what='bind_rows') |>
               mutate(city=str_extract(filename, "(?<=cdnd_data/)[^/]+"))|>
               group_by(city)|>
               summarise(res_neigh = mean(as.numeric(res), na.rm=T)) )

l = l |>
  mutate(city=str_extract(filename, "(?<=cdnd_data/)[^/]+"))|>
  select(-filename)|>
  left_join(neighs, by = 'city')

gc()

colors = c(`Block Group` = "#9A3324",
           `Tract` = "#00274C",
           `ZCTA` = "#FFCB05",
           `other` = 'lightgrey')

p = l |>
  mutate(res = as.numeric(res),
         log_pop = log(avg_pop),
         log_area = log(res))|>
  select(log_pop ,p_black  ,     log_area, geo  , city)|>
  pivot_longer(log_pop:log_area) |>
  left_join(l |>
              mutate(res = as.numeric(res_neigh),
                     log_pop = log(pop_neigh),
                     log_area = log(res))|>
              select(log_pop ,p_black = p_black_neigh  ,     log_area , geo  , city)|>
              pivot_longer(log_pop:log_area) |>
              select(geo, city, name, value_neigh = value), by = c('city', 'name', 'geo'))|>
  mutate(stat = case_when(name == 'log_pop' ~ 'Logged pop.',
                          name == 'p_black' ~ 'Prop. Black',
                          name == 'log_area' ~ 'Logged sq. miles'))

limits1 <- p |>
    filter(name == 'log_pop') |>
    group_by(geo, stat) |>
    summarise(xmin = min(value,na.rm=T), xmax = max(value,na.rm=T),
              ymin = min(value_neigh,na.rm=T), ymax = max(value_neigh,na.rm=T), .groups = 'drop')



plot_1 = p |>
    filter(name == 'log_pop') |>
    ggplot(aes(x = value, y = value_neigh, color = geo)) +
    geom_point() +
    geom_smooth(method = 'lm', color = 'black', linetype = 'dashed') +
    scale_color_manual(values = colors) +
    theme_bw() +
    ylab('Neighborhood') +
    xlab('Census comparison') +
    guides(color = 'none') +
    geom_abline(intercept = 0 , slope = 1, color = 'red') +
    theme(legend.title = element_blank(),
          text = element_text(size = 15),
          legend.position = 'bottom') +
    facet_grid(cols = vars(geo), rows = vars(stat), scales = 'free') +
    scale_x_continuous(limits = c(min(c(limits1$xmin, limits1$ymin)), max(c(limits1$xmax, limits1$ymax)))) +
    scale_y_continuous(limits = c(min(c(limits1$xmin, limits1$ymin)), max(c(limits1$xmax, limits1$ymax))))


limits2 <- p |>
    filter(name == 'log_area') |>
    group_by(geo, stat) |>
    summarise(xmin = min(value,na.rm=T), xmax = max(value,na.rm=T),
              ymin = min(value_neigh,na.rm=T), ymax = max(value_neigh,na.rm=T), .groups = 'drop')




plot_2 = p |>
    filter(name == 'log_area') |>
    ggplot(aes(x = value, y = value_neigh, color = geo)) +
    geom_point() +
    geom_smooth(method = 'lm', color = 'black', linetype = 'dashed') +
    scale_color_manual(values = colors) +
    theme_bw() +
    ylab('Neighborhood') +
    xlab('Census comparison') +
    guides(color = 'none') +
    geom_abline(intercept = 0 , slope = 1, color = 'red') +
    theme(legend.title = element_blank(),
          text = element_text(size = 15),
          legend.position = 'bottom') +
    facet_grid(cols = vars(geo), rows = vars(stat), scales = 'free') +
    scale_x_continuous(limits = c(min(c(limits2$xmin, limits2$ymin)), max(c(limits2$xmax, limits2$ymax)))) +
    scale_y_continuous(limits = c(min(c(limits2$xmin, limits2$ymin)), max(c(limits2$xmax, limits2$ymax))))

limits3 <- p |>
    filter(name == 'p_black') |>
    group_by(geo, stat) |>
    summarise(xmin = min(value,na.rm=T), xmax = max(value,na.rm=T),
              ymin = min(value_neigh,na.rm=T), ymax = max(value_neigh,na.rm=T), .groups = 'drop')


plot_3 = p |>
    filter(name == 'p_black') |>
    ggplot(aes(x = value, y = value_neigh, color = geo)) +
    geom_point() +
    geom_smooth(method = 'lm', color = 'black', linetype = 'dashed') +
    scale_color_manual(values = colors) +
    theme_bw() +
    ylab('Neighborhood') +
    xlab('Census comparison') +
    guides(color = 'none') +
    geom_abline(intercept = 0 , slope = 1, color = 'red') +
    theme(legend.title = element_blank(),
          text = element_text(size = 15),
          legend.position = 'bottom') +
    facet_grid(cols = vars(geo), rows = vars(stat), scales = 'free') +
    scale_x_continuous(limits = c(min(c(limits3$xmin, limits3$ymin)), max(c(limits3$xmax, limits3$ymax)))) +
    scale_y_continuous(limits = c(min(c(limits3$xmin, limits3$ymin)), max(c(limits3$xmax, limits3$ymax))))






plot_1/plot_2/plot_3
ggsave('output/census-comp-scatter.png', dpi=300, width = 9, height = 9)





